library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(cluster)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("leaflet")
## 
## The downloaded binary packages are in
##  /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(leaflet)
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("ggmap")
## 
## The downloaded binary packages are in
##  /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("sf")
## 
## The downloaded binary packages are in
##  /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("osmdata")
## 
## The downloaded binary packages are in
##  /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("rosm")
## 
## The downloaded binary packages are in
##  /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(rosm)
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("ggspatial")
## 
## The downloaded binary packages are in
##  /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(ggspatial)
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("prettymapr")
## 
## The downloaded binary packages are in
##  /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(prettymapr)
## 
## Attaching package: 'prettymapr'
## 
## The following objects are masked from 'package:rosm':
## 
##     makebbox, zoombbox
## 
## The following objects are masked from 'package:ggmap':
## 
##     clear_geocode_cache, geocode
zaehlstellen <- read_csv("data/dauerzaehlstellen_location.csv")
## Rows: 87 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): ZNAME, STRNR, RICHTUNG_1, RICHTUNG_2, BEZIRK_NAME
## dbl (6): ZNR, LONGITUDE, LATITUDE, BEZIRK_PLZ, BEZIRK_NR, BEZIRK_CODE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
verkehr <- read_csv("data/dauerzaehlstellen_data.csv")
## Rows: 40418 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (4): ZNAME, STRTYP, RINAME, FZTYP
## dbl  (11): ZNR, STRNR, DTVMS, DTVMF, DTVMO, DTVDD, DTVFR, DTVSA, DTVSF, TVMA...
## date  (2): DATUM, TVMAXT
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Nur Einträge mit RINAME == "Gesamt" => Auffahrten und Ausfahrten werden nicht berücksichtigt
gesamt <- verkehr %>% filter(RINAME == "Gesamt")

gesamt_kfz <- gesamt %>% filter(FZTYP == "Kfz")
gesamt_lkw <- gesamt %>% filter(FZTYP == "LkwÄ")

gesamt_kfz$ZNR <- as.integer(gesamt_kfz$ZNR)
gesamt_lkw$ZNR <- as.integer(gesamt_lkw$ZNR)
zaehlstellen$ZNR <- as.integer(zaehlstellen$ZNR)

# Mergen mit Koordinaten durch die logs
gesamt_kfz_geo <- inner_join(gesamt_kfz, zaehlstellen, by = "ZNR")
gesamt_lkw_geo <- inner_join(gesamt_lkw, zaehlstellen, by = "ZNR")
ggplot() +
  geom_point(data = gesamt_kfz_geo, aes(x = as.numeric(LONGITUDE), y = as.numeric(LATITUDE)), 
             color = "red", alpha = 0.6, size = 2) +
  labs(title = "Zählstellen Wien", x = "Längengrad", y = "Breitengrad") +
  theme_minimal()

# Clusteranalyse (Features) gesamte Woche
cluster_features <- c("DTVMO", "DTVDD", "DTVFR", "DTVSA", "DTVSF")

# Daten bereinigen
kfz_data <- gesamt_kfz_geo %>%
  select(all_of(cluster_features)) %>%
  mutate_all(as.numeric) %>%
  drop_na()

lkw_data <- gesamt_lkw_geo %>%
  select(all_of(cluster_features)) %>%
  mutate_all(as.numeric) %>%
  drop_na()

kfz_scaled <- scale(kfz_data)
lkw_scaled <- scale(lkw_data)

# Elbow-Methode kfz
fviz_nbclust(kfz_scaled, kmeans, method = "wss") +
  labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) Kfz", x = "Anzahl Cluster (k)", y = "WSS")

# Elbow-Methode lkw
fviz_nbclust(lkw_scaled, kmeans, method = "wss") +
  labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) LkwÄ", x = "Anzahl Cluster (k)", y = "WSS")

# K-Means Cluster
kfz_cluster <- kmeans(kfz_scaled, centers = 3, nstart = 25)
lkw_cluster <- kmeans(lkw_scaled, centers = 3, nstart = 25)

# Cluster-Zuordnung hinzufügen
gesamt_kfz_geo_clean <- gesamt_kfz_geo[complete.cases(kfz_data), ]
gesamt_kfz_geo_clean$Cluster <- factor(kfz_cluster$cluster)

gesamt_lkw_geo_clean <- gesamt_lkw_geo[complete.cases(lkw_data), ]
gesamt_lkw_geo_clean$Cluster <- factor(lkw_cluster$cluster)
print(gesamt_kfz_geo_clean)
## # A tibble: 6,402 × 28
##    DATUM        ZNR ZNAME.x  STRTYP STRNR.x RINAME FZTYP DTVMS DTVMF DTVMO DTVDD
##    <date>     <int> <chr>    <chr>    <dbl> <chr>  <chr> <dbl> <dbl> <dbl> <dbl>
##  1 2024-12-01  1075 Reichsb… B            8 Gesamt Kfz   31051 35485 32919 36601
##  2 2024-12-01  1078 Westbah… B          221 Gesamt Kfz   66551 68354 65484 68342
##  3 2024-12-01  1089 Florids… B          226 Gesamt Kfz   23962 26654 25003 27432
##  4 2024-12-01  1096 Brigitt… B           14 Gesamt Kfz   22181 25033 23812 25772
##  5 2024-12-01  1131 Karlspl… B            1 Gesamt Kfz   32842 36038 33930 36702
##  6 2024-12-01  1170 Donauka… B          227 Gesamt Kfz   59181 62840 60469 62766
##  7 2024-12-01  1177 Handels… B           14 Gesamt Kfz   21606 24222 23049 24871
##  8 2024-12-01  1179 Brunner… B           12 Gesamt Kfz   28079 32240 31695 32305
##  9 2024-12-01  1180 Hochstr… B           13 Gesamt Kfz    9884 11053 10524 11160
## 10 2024-12-01  1181 Breiten… B           13 Gesamt Kfz   11815 13057 12626 13158
## # ℹ 6,392 more rows
## # ℹ 17 more variables: DTVFR <dbl>, DTVSA <dbl>, DTVSF <dbl>, TVMAX <dbl>,
## #   TVMAXT <date>, ISTCOVID19 <dbl>, ZNAME.y <chr>, STRNR.y <chr>,
## #   RICHTUNG_1 <chr>, RICHTUNG_2 <chr>, LONGITUDE <dbl>, LATITUDE <dbl>,
## #   BEZIRK_NAME <chr>, BEZIRK_PLZ <dbl>, BEZIRK_NR <dbl>, BEZIRK_CODE <dbl>,
## #   Cluster <fct>
farben <- c("1" = "red", "2" = "green", "3" = "blue")
pal <- colorFactor(palette = farben, domain = gesamt_kfz_geo_clean$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_kfz_geo_clean) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~LONGITUDE,
    lat = ~LATITUDE,
    color = ~pal(Cluster),
    label = ~paste("Cluster:", Cluster),
    radius = 3,
    fillOpacity = 0.7
  )
farben <- c("1" = "red", "2" = "green", "3" = "blue")
pal <- colorFactor(palette = farben, domain = gesamt_lkw_geo_clean$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_lkw_geo_clean) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~LONGITUDE,
    lat = ~LATITUDE,
    color = ~pal(Cluster),
    label = ~paste("Cluster:", Cluster),
    radius = 3,
    fillOpacity = 0.7
  )
# Plot Cluster Kfz
ggplot(gesamt_kfz_geo_clean, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
  geom_point(size = 2, alpha = 0.7) +
  labs(title = "Clusteranalyse Kfz – Zählstellen Wien") +
  theme_minimal()

# Plot Cluster Lkw
ggplot(gesamt_lkw_geo_clean, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
  geom_point(size = 2, alpha = 0.7) +
  labs(title = "Clusteranalyse LkwÄ – Zählstellen Wien") +
  theme_minimal()

# Clusteranalyse (Features) der Tage Montag bis Freitag
cluster_features_mo_fr <- c("DTVMO", "DTVDD", "DTVFR")

# Daten bereinigen
kfz_data_mo_fr <- gesamt_kfz_geo %>%
  select(all_of(cluster_features_mo_fr)) %>%
  mutate_all(as.numeric) %>%
  drop_na()

lkw_data_mo_fr <- gesamt_lkw_geo %>%
  select(all_of(cluster_features_mo_fr)) %>%
  mutate_all(as.numeric) %>%
  drop_na()

kfz_scaled_mo_fr <- scale(kfz_data_mo_fr)
lkw_scaled_mo_fr <- scale(lkw_data_mo_fr)

# Elbow-Methode kfz
fviz_nbclust(kfz_scaled_mo_fr, kmeans, method = "wss") +
  labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) Kfz", x = "Anzahl Cluster (k)", y = "WSS")

# Elbow-Methode lkw
fviz_nbclust(lkw_scaled_mo_fr, kmeans, method = "wss") +
  labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) LkwÄ", x = "Anzahl Cluster (k)", y = "WSS")

# K-Means Cluster
kfz_cluster_mo_fr <- kmeans(kfz_scaled_mo_fr, centers = 3, nstart = 25)
lkw_cluster_mo_fr <- kmeans(lkw_scaled_mo_fr, centers = 4, nstart = 25)

# Cluster-Zuordnung hinzufügen
gesamt_kfz_geo_clean_mo_fr <- gesamt_kfz_geo[complete.cases(kfz_data_mo_fr), ]
gesamt_kfz_geo_clean_mo_fr$Cluster <- factor(kfz_cluster_mo_fr$cluster)

gesamt_lkw_geo_clean_mo_fr <- gesamt_lkw_geo[complete.cases(lkw_data_mo_fr), ]
gesamt_lkw_geo_clean_mo_fr$Cluster <- factor(lkw_cluster_mo_fr$cluster)
farben <- c("1" = "red", "2" = "green", "3" = "blue")
pal <- colorFactor(palette = farben, domain = gesamt_kfz_geo_clean_mo_fr$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_kfz_geo_clean_mo_fr) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~LONGITUDE,
    lat = ~LATITUDE,
    color = ~pal(Cluster),
    label = ~paste("Cluster:", Cluster),
    radius = 3,
    fillOpacity = 0.7
  )
farben <- c("1" = "red", "2" = "green", "3" = "blue", "4" = "purple")
pal <- colorFactor(palette = farben, domain = gesamt_lkw_geo_clean_mo_fr$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_lkw_geo_clean_mo_fr) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~LONGITUDE,
    lat = ~LATITUDE,
    color = ~pal(Cluster),
    label = ~paste("Cluster:", Cluster),
    radius = 3,
    fillOpacity = 0.7
  )
# Plot Cluster Kfz
ggplot(gesamt_kfz_geo_clean_mo_fr, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
  geom_point(size = 2, alpha = 0.7) +
  labs(title = "Clusteranalyse Kfz – Zählstellen Wien") +
  theme_minimal()

# Plot Cluster Lkw
ggplot(gesamt_lkw_geo_clean_mo_fr, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
  geom_point(size = 2, alpha = 0.7) +
  labs(title = "Clusteranalyse LkwÄ – Zählstellen Wien") +
  theme_minimal()

# Clusteranalyse (Features) nur Wochenende und Feiertage
cluster_features_we <- c("DTVSA", "DTVSF")

# Daten bereinigen
kfz_data_we <- gesamt_kfz_geo %>%
  select(all_of(cluster_features_we)) %>%
  mutate_all(as.numeric) %>%
  drop_na()

lkw_data_we <- gesamt_lkw_geo %>%
  select(all_of(cluster_features_we)) %>%
  mutate_all(as.numeric) %>%
  drop_na()

kfz_scaled_we <- scale(kfz_data_we)
lkw_scaled_we <- scale(lkw_data_we)

# Elbow-Methode kfz
fviz_nbclust(kfz_scaled_we, kmeans, method = "wss") +
  labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) Kfz", x = "Anzahl Cluster (k)", y = "WSS")

# Elbow-Methode lkw
fviz_nbclust(lkw_scaled_we, kmeans, method = "wss") +
  labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) LkwÄ", x = "Anzahl Cluster (k)", y = "WSS")

# K-Means Cluster
kfz_cluster_we <- kmeans(kfz_scaled_we, centers = 5, nstart = 25)
lkw_cluster_we <- kmeans(lkw_scaled_we, centers = 3, nstart = 25)

# Cluster-Zuordnung hinzufügen
gesamt_kfz_geo_clean_we <- gesamt_kfz_geo[complete.cases(kfz_data_we), ]
gesamt_kfz_geo_clean_we$Cluster <- factor(kfz_cluster_we$cluster)

gesamt_lkw_geo_clean_we <- gesamt_lkw_geo[complete.cases(lkw_data_we), ]
gesamt_lkw_geo_clean_we$Cluster <- factor(lkw_cluster_we$cluster)
farben <- c("1" = "red", "2" = "green", "3" = "turquoise","4" = "blue", "5" = "purple")
pal <- colorFactor(palette = farben, domain = gesamt_kfz_geo_clean_we$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_kfz_geo_clean_we) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~LONGITUDE,
    lat = ~LATITUDE,
    color = ~pal(Cluster),
    label = ~paste("Cluster:", Cluster),
    radius = 3,
    fillOpacity = 0.7
  )
farben <- c("1" = "red", "2" = "green", "3" = "blue")
pal <- colorFactor(palette = farben, domain = gesamt_lkw_geo_clean_we$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_lkw_geo_clean_we) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~LONGITUDE,
    lat = ~LATITUDE,
    color = ~pal(Cluster),
    label = ~paste("Cluster:", Cluster),
    radius = 3,
    fillOpacity = 0.7
  )
# Plot Cluster Kfz
ggplot(gesamt_kfz_geo_clean_we, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
  geom_point(size = 2, alpha = 0.7) +
  labs(title = "Clusteranalyse Kfz – Zählstellen Wien") +
  theme_minimal()

# Plot Cluster Lkw
ggplot(gesamt_lkw_geo_clean_we, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
  geom_point(size = 2, alpha = 0.7) +
  labs(title = "Clusteranalyse LkwÄ – Zählstellen Wien") +
  theme_minimal()